Pacages for the script
Mean function for ggplot2 plots - the function adds a label with the column mean.
meanFunction <- function(x){
return(data.frame(y=round(mean(x),2),label=round(mean(x,na.rm=T),2)))}
themes to use with ggplot2
themes_ggplot <- theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
font("xlab", size = 12, color = "black", face = "bold") +
font("ylab", size = 12, color = "black", face = "bold") +
font("title", size = 14, color = "black", face = "bold")+
font("legendtitle", size = 12, color = "black", face = "bold")+
font("legendtext", size = 12, color = "black")+
font("xy.text", size = 12, color = "black")+
font("ylab", size = 12, color = "black", face = "bold") +
font("xlab", size = 12, color = "black", face = "bold") +
theme(strip.text.x = element_text( size = 12, color = "black", face = "bold"))+
theme(strip.text.y = element_text( size = 12, color = "black", face = "bold"))+
theme(legend.position='top')
The data were downloded from SRA by SRAtoolkit. The information about each sample were obtained from the dbGAP database.
sra_table_path <- "raw/Cornell/SraRunTable.txt"
dbgap_data_path <- "raw/Cornell/combined_sample_subject_attribute.csv"
#read and combine information table together
dbgap_table <- read.csv(file= dbgap_data_path, stringsAsFactors=FALSE)
sra_table_part <- read.csv(file= sra_table_path, stringsAsFactors=FALSE)
sra_table <- left_join(dbgap_table,sra_table_part,by = c("BioSample.Accession"="BioSample"))
#take only required columns
vector_sra_colmns <- c("RT_PCR","VIRAL_LOAD")
sra_table <- dplyr::select(sra_table,c("Run",vector_sra_colmns))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(vector_sra_colmns)` instead of `vector_sra_colmns` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
#Create a new column of controls and COVID-19 patients, based on viral load values.
sra_table$Covid_Load <- NA
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "None"] <- "Control"
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "Low"] <- "COVID-19"
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "Medium"] <- "COVID-19"
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "High"] <- "COVID-19"
sra_table$Covid_Load[sra_table$VIRAL_LOAD == "OtherViralInfection"] <- "Other Viruses"
#orgenize the values in each column
sra_table$RT_PCR <- factor(sra_table$RT_PCR,levels = c("NotDetected","Detected"))
sra_table$VIRAL_LOAD <- factor(sra_table$VIRAL_LOAD ,levels = c("None","Low",
"Medium", "High",
"OtherViralInfection"))
sra_table$Covid_Load <- factor(sra_table$Covid_Load ,levels = c("Control","Other Viruses", "COVID-19"))
read ADAR expression levels.
salmon_path <- "raw/Cornell/SalmonTPM_ADAR.wideGenes.csv"
df_salmon <- read.csv(file=salmon_path,header=T, stringsAsFactors=FALSE)
# sra_table$Run[!sra_table$Run %in% df_salmon$Sample] # detect failed samples
# length(df_salmon$Sample) == length(unique(df_salmon$Sample))
# detect samples with no values at all.
# df_salmon[ c(df_salmon$ADARB1 == 0 & df_salmon$ADARB2 == 0 &
# df_salmon$ADAR == 0 & df_salmon$ADARB2.AS1 == 0 ),]
# df_salmon$Sample[ c(df_salmon$ADARB1 == 0 & df_salmon$ADARB2 == 0 &
# df_salmon$ADAR == 0 & df_salmon$ADARB2.AS1 == 0 )]
#read and combine ISG score.
df_ISG <- read.csv("raw/Cornell/ArticleCalcLog2_ISG_madz_scores.csv")
df_salmon <- left_join(df_salmon, df_ISG, by="Sample")
STAR_stats <- read.csv("raw/Cornell/STAR_stats.csv", check.names = F)
# length(STAR_stats$Sample)
# length(unique(STAR_stats$Sample))
#remove duplicated lines and samples that did not map at all.
STAR_stats_removedDup <- unique(STAR_stats)
STAR_stats_removedDup <- STAR_stats_removedDup[STAR_stats_removedDup$`Uniquely mapped reads number`>0,]
aluIndex_table_path <-"raw/Cornell/ALuIndexEditingIndex.csv"
alu_table_index = read.csv(file=aluIndex_table_path,header=T, stringsAsFactors=FALSE)
alu_table_index$Sample <- gsub(".", "", alu_table_index$Sample,fixed = TRUE) #fix sample name if needed.
names(alu_table_index)[names(alu_table_index) == "Sample"] <- "Run"
#select requird columns
alu_table_index_combined <- dplyr::select(alu_table_index,"Run",
"A2GEditingIndex","A2TEditingIndex",
"C2AEditingIndex","C2TEditingIndex",
"C2GEditingIndex","A2CEditingIndex")
# filter the samples from failed samples and duplicated samples.
alu_table_index_combined <- unique(alu_table_index_combined)
alu_table_index_above0 <- alu_table_index_combined[!
c(alu_table_index_combined$A2GEditingIndex == 0 &
alu_table_index_combined$A2TEditingIndex == 0 &
alu_table_index_combined$C2AEditingIndex == 0 &
alu_table_index_combined$C2TEditingIndex == 0 &
alu_table_index_combined$C2GEditingIndex == 0 &
alu_table_index_combined$A2CEditingIndex ==0),]
alu_table_index_combined <- alu_table_index_above0
#change the column names
names(alu_table_index_combined) <- gsub("EditingIndex","EditingIndex_Alu",
names(alu_table_index_combined))
alu3UTR_table_path <- "raw/Cornell/EditingIndex_sammary_3UTR.csv"
alu3UTR_table_index = read.csv(file=alu3UTR_table_path,header=T, stringsAsFactors=FALSE)
alu3UTR_table_index$Sample <- gsub(".", "", alu3UTR_table_index$Sample,fixed = TRUE) #fix sample name if needed.
names(alu3UTR_table_index)[names(alu3UTR_table_index) == "Sample"] <- "Run"
alu3UTR_table_index_combined <- dplyr::select(alu3UTR_table_index,"Run",#vector_sra_colmns,
"A2GEditingIndex","A2TEditingIndex",
"C2AEditingIndex","C2TEditingIndex",
"C2GEditingIndex","A2CEditingIndex")
# filter the samples from failed samples and duplicated samples.
alu3UTR_table_index_combined <- unique(alu3UTR_table_index_combined)
alu3UTR_table_index_combined <- alu3UTR_table_index_combined[!
c(alu3UTR_table_index_combined$A2GEditingIndex == 0 &
alu3UTR_table_index_combined$A2TEditingIndex == 0 &
alu3UTR_table_index_combined$C2AEditingIndex == 0 &
alu3UTR_table_index_combined$C2TEditingIndex == 0 &
alu3UTR_table_index_combined$C2GEditingIndex == 0 &
alu3UTR_table_index_combined$A2CEditingIndex ==0),]
#change the column names
names(alu3UTR_table_index_combined) <- gsub("EditingIndex","EditingIndex_3UTR",
names(alu3UTR_table_index_combined))
sra_table[!sra_table$Run %in% df_salmon$Sample,"Covid_Load"]
## [1] Control Control COVID-19 Control
## Levels: Control Other Viruses COVID-19
sra_table[!sra_table$Run %in% df_ISG$Sample,"Covid_Load"]
## [1] Control Control COVID-19 Control
## Levels: Control Other Viruses COVID-19
sra_table[!sra_table$Run %in% STAR_stats_removedDup$Sample,"Covid_Load"]
## [1] Control Control COVID-19 Control
## Levels: Control Other Viruses COVID-19
table(sra_table[!sra_table$Run %in% alu_table_index_combined$Run,"Covid_Load"])
##
## Control Other Viruses COVID-19
## 14 1 2
table(left_join(STAR_stats_removedDup,sra_table,by=c("Sample"="Run"))[!STAR_stats_removedDup$Sample %in% alu_table_index_combined$Run,"Covid_Load"])
##
## Control Other Viruses COVID-19
## 11 1 1
# sra_table[!sra_table$Run %in% df_salmon$Sample,c("Run","Covid_Load")]
# sra_table[!sra_table$Run %in% df_ISG$Sample,c("Run","Covid_Load")]
# sra_table[!sra_table$Run %in% STAR_stats_removedDup$Sample,c("Run","Covid_Load")]
# sra_table[!sra_table$Run %in% alu_table_index_combined$Run,c("Run","Covid_Load")]
table(sra_table[!sra_table$Run %in% alu_table_index_combined$Run,c("Covid_Load")])
##
## Control Other Viruses COVID-19
## 14 1 2
table(left_join(STAR_stats_removedDup,sra_table,by=c("Sample"="Run"))[!STAR_stats_removedDup$Sample %in% alu_table_index_combined$Run,"Covid_Load"])
##
## Control Other Viruses COVID-19
## 11 1 1
OUT_DIR = "results/Cornell/"
dir.create(OUT_DIR)
## Warning in dir.create(OUT_DIR): 'results/Cornell' already exists
#combine all tables to one table
df_combined <- left_join(alu_table_index_combined,sra_table ,by="Run")
df_combined <- left_join(df_combined,alu3UTR_table_index_combined ,by="Run")
df_combined <- left_join(df_combined,df_salmon,by=c("Run"="Sample"))
df_combined <- left_join(df_combined,STAR_stats_removedDup,by=c("Run"="Sample"))
#verify that there no duplicated samples
length(unique(df_combined$Run)) == length(df_combined$Run)
## [1] TRUE
out_path <- paste0(OUT_DIR,"/combinedResults_MainResults.csv")
write.csv(df_combined, out_path, row.names = F, quote = F)
df_combined %>%
filter(Covid_Load != "Other Viruses" ) %>%
mutate(Sample = Run) %>%
dplyr::select(Sample, A2GEditingIndex_Alu) %>% write.csv(paste0(OUT_DIR,"/AEI_sample.csv"),
row.names = F, quote = F)
OUT_DIR = "results/Cornell/"
dir.create(OUT_DIR)
## Warning in dir.create(OUT_DIR): 'results/Cornell' already exists
Generally summarize the results.
df_combined_Stats <- df_combined %>%
group_by(Covid_Load) %>% #nrow()
summarize(ADAR = paste(round(mean(ADAR),2), "(",round(min(ADAR),2),", ",
round(max(ADAR),2),", ",round(median(ADAR),2),", +-",
round(sd(ADAR),2),")"),
ADARB1 = paste(round(mean(ADARB1),2), "(",round(min(ADARB1),2),", ",
round(max(ADARB1),2),", ",round(median(ADARB1),2),", +-",
round(sd(ADARB1),2),")"),
ADARB2 = paste(round(mean(ADARB2),2), "(",round(min(ADARB2),2),", ",
round(max(ADARB2),2),", ",round(median(ADARB2),2),", +-",
round(sd(ADARB2),2),")"),
ADARB2.AS1 = paste(round(mean(ADARB2.AS1),2), "(",round(min(ADARB2.AS1),2),", ",
round(max(ADARB2.AS1),2),", ",round(median(ADARB2.AS1),2),", +-",
round(sd(ADARB2.AS1),2),")"),
ISG38_score = paste(round(mean(ISG38_score),2), "(",round(min(ISG38_score),2),", ",
round(max(ISG38_score),2),", ",round(median(ISG38_score),2), ", +-",
round(sd(ISG38_score),2),")"),
A2GEditingIndex_Alu = paste(round(mean(A2GEditingIndex_Alu),2), "(",
round(min(A2GEditingIndex_Alu),2),", ",
round(max(A2GEditingIndex_Alu),2),", ",
round(median(A2GEditingIndex_Alu),2), ", +-",
round(sd(A2GEditingIndex_Alu),2),")"),
A2GEditingIndex_3UTR = paste(round(mean(A2GEditingIndex_3UTR),2), "(",
round(min(A2GEditingIndex_3UTR),2),", ",
round(max(A2GEditingIndex_3UTR),2),", ",
round(median(A2GEditingIndex_3UTR),2),", +-",
round(sd(A2GEditingIndex_3UTR),2),")")
)
write.csv(df_combined_Stats, paste0(OUT_DIR,"/combinedResults_mean_min_max_median_sd.csv"),
row.names = F, quote = F)
df_combined %>%
group_by(VIRAL_LOAD) %>%
summarize(ADAR = paste(round(mean(ADAR),2), "(",round(min(ADAR),2),", ",
round(max(ADAR),2),", ",round(median(ADAR),2),")"),
ADARB1 = paste(round(mean(ADARB1),2), "(",round(min(ADARB1),2),", ",
round(max(ADARB1),2),", ",round(median(ADARB1),2),")"),
ADARB2 = paste(round(mean(ADARB2),2), "(",round(min(ADARB2),2),", ",
round(max(ADARB2),2),", ",round(median(ADARB2),2),")"),
ADARB2.AS1 = paste(round(mean(ADARB2.AS1),2), "(",round(min(ADARB2.AS1),2),", ",
round(max(ADARB2.AS1),2),", ",round(median(ADARB2.AS1),2),")"),
ISG38_score = paste(round(mean(ISG38_score),2), "(",round(min(ISG38_score),2),", ",
round(max(ISG38_score),2),", ",round(median(ISG38_score),2),")"),
A2GEditingIndex_Alu = paste(round(mean(A2GEditingIndex_Alu),2), "(",
round(min(A2GEditingIndex_Alu),2),", ",
round(max(A2GEditingIndex_Alu),2),", ",
round(median(A2GEditingIndex_Alu),2),")"),
A2GEditingIndex_3UTR = paste(round(mean(A2GEditingIndex_3UTR),2), "(",
round(min(A2GEditingIndex_3UTR),2),", ",
round(max(A2GEditingIndex_3UTR),2),", ",
round(median(A2GEditingIndex_3UTR),2),")")
)
## # A tibble: 5 × 8
## VIRAL_LOAD ADAR ADARB1 ADARB2 ADARB2.AS1 ISG38_score A2GEditingIndex…
## <fct> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 None 90.4… 3.52 … 1.65 … 0.01 ( 0 … -0.39 ( -4… 1.62 ( 0.09 , …
## 2 Low 131.… 6.54 … 1.96 … 0 ( 0 , … 0.04 ( -4.… 1.67 ( 0.18 , …
## 3 Medium 176.… 5.14 … 1.13 … 0 ( 0 , … 0.44 ( -1.… 1.83 ( 0.63 , …
## 4 High 201.… 4.11 … 1.33 … 0.01 ( 0 … 0.64 ( -0.… 1.91 ( 0.47 , …
## 5 OtherViralInfecti… 152.… 5.1 (… 1.17 … 0.02 ( 0 … 0.28 ( -0.… 1.73 ( 0.45 , …
## # … with 1 more variable: A2GEditingIndex_3UTR <chr>
y_position = max(df_combined$ADAR)
c <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
compare_means(ADAR~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")
df_c <- as.data.frame(c)
adar1 <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
ggplot(
aes(y=ADAR, x=Covid_Load))+
geom_boxplot(aes(fill=Covid_Load)) + geom_jitter( size=2, alpha = 0.2)+
facet_grid( ~ "ADAR1" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue", "red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1
out_path <- paste0(OUT_DIR,'/',"SALMON_ADAR1_CovidComb.pdf")
ggsave(out_path, adar1, width = 10, height = 6)
y_position = max(df_combined$ADARB1)
c <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
compare_means(ADARB1~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns" #c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")
df_c <- as.data.frame(c)
adar2 <-
df_combined %>% filter(Covid_Load != "Other Viruses") %>%
ggplot(
aes(y=ADARB1, x=Covid_Load))+
geom_boxplot(aes(fill=Covid_Load)) + geom_jitter( size=2, alpha = 0.2)+
facet_grid( ~ "ADAR2" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue", "red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar2
out_path <- paste0(OUT_DIR,'/',"SALMON_ADAR2_CovidComb.pdf")
ggsave(out_path, adar2, width = 10, height = 6)
y_position = max(df_combined$ADARB2)
c <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
compare_means(ADARB2~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"#c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")
df_c <- as.data.frame(c)
adar3 <-
df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
ggplot(
aes(y=ADARB2, x=Covid_Load))+
geom_boxplot(aes(fill=Covid_Load)) + geom_jitter( size=2, alpha = 0.2)+
facet_grid( ~ "ADAR3" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue", "red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar3
out_path <- paste0(OUT_DIR,'/',"SALMON_ADAR3_CovidComb.pdf")
ggsave(out_path, adar3, width = 10, height = 6)
plot_grid(adar1, adar2, adar3, labels = c('', '',''), label_size = 12, ncol=3)
out_path <- paste0(OUT_DIR,'/',"SALMON_all_adars_CovidComb.pdf")
ggsave(out_path, width = 14, height = 6)
y_position = max(df_combined$ADAR)
c <- df_combined %>%
# filter(Gene_change == "ADAR1") %>%
compare_means(ADAR~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, y_position *1.3, y_position*1.4,y_position *1.5,
y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
# c$p.adj.format.stars[c$p.adj < 0.1] <- c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars <- paste0(c$p.adj.format.stars," (p=",c$p.adj,")")
c$p.adj.format.stars[grep("ns",c$p.adj.format.stars) ] <- "ns"
df_c <- as.data.frame(c)
adar1_viralLoad <- ggplot(data=df_combined,
aes(y=ADAR, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
facet_grid( ~ "ADAR1" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_SALMON_ADAR1.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$ADAR)
c <- df_combined %>%
filter(Covid_Load == "COVID-19") %>%
compare_means(ADAR~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars <- paste0(c$p.adj.format.stars," (p=",c$p.adj,")")
c$p.adj.format.stars[grep("ns",c$p.adj.format.stars) ] <- "ns"
df_c <- as.data.frame(c)
adar1_viralLoad <-
df_combined %>%
filter(Covid_Load == "COVID-19") %>%
ggplot(aes(y=ADAR, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
facet_grid( ~ "ADAR1" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("Normalized Expression (TPM)")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("yellow", "orange","red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_SALMON_ADAR1_onlyCOVID.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$ISG38_score)
c <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
compare_means(ISG38_score~Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"#c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")
df_c <- as.data.frame(c)
ISG38 <-
df_combined%>%
filter(Covid_Load != "Other Viruses") %>%
ggplot(
aes(y=ISG38_score, x=Covid_Load))+
geom_boxplot(aes(fill=Covid_Load)) + geom_jitter( size=2, alpha = 0.2)+
themes_ggplot+ ylab("Interferon-Stimulated Genes Score")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
ISG38
out_path <- paste0(OUT_DIR,'/',"ISG38_CovidComb.pdf")
ggsave(out_path, ISG38, width = 10, height = 6)
y_position = max(df_combined$ISG38_score)
c <- df_combined %>%
# filter(Gene_change == "ADAR1") %>%
compare_means(ISG38_score ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, y_position *1.3, y_position*1.4,y_position *1.5,
y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
df_c <- as.data.frame(c)
adar1_viralLoad <- ggplot(data=df_combined,
aes(y=ISG38_score, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
# facet_grid( ~ "ADAR1" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("Interferon-Stimulated Genes Score")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_ISG38_score.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
edit_type_names <- c("A2GEditingIndex_Alu" = "A-G (T-C)", "A2TEditingIndex_Alu" = "A-T (T-A)",
"C2AEditingIndex_Alu" = "C-A (G-T)", "C2TEditingIndex_Alu" = "C-T (G-A)",
"C2GEditingIndex_Alu" = "C-G (G-C)", "A2CEditingIndex_Alu" = "A-C (T-G)")
AG_clean <-df_combined %>%
dplyr::select("Run", "A2GEditingIndex_Alu","A2TEditingIndex_Alu",
"C2AEditingIndex_Alu","C2TEditingIndex_Alu","C2GEditingIndex_Alu","A2CEditingIndex_Alu") %>%
gather(edit_type, value, -Run) %>%
ggplot(
aes(y=value, x=edit_type ,fill=edit_type))+
geom_boxplot(aes(fill=edit_type)) +
themes_ggplot+
# theme(axis.text.x = element_text(angle = 90))+
ylab("ALU Editing Index")+
xlab ("Nucleotide mismatch")+
scale_fill_discrete(name = "Mistmatch:")+
theme(axis.title.x = element_blank())+
theme(legend.position = "none") +
scale_fill_manual(values=c("yellow", "red", "blue", "green", "orange", "black"))+
scale_x_discrete(labels= edit_type_names)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
AG_clean
out_path <- paste0(OUT_DIR,'/',"AluEditing_cleanSignal.pdf")
ggsave(out_path, AG_clean, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_Alu)
df_combined %>%
mutate(AG = A2GEditingIndex_Alu) %>%
compare_means(AG ~ Covid_Load, data = .)
## # A tibble: 3 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 AG Control Other Viruses 0.126 0.13 0.126 ns Wilcox…
## 2 AG Control COVID-19 0.00000672 0.00002 6.7e-06 **** Wilcox…
## 3 AG Other Viruses COVID-19 0.0563 0.11 0.056 ns Wilcox…
c <- df_combined %>%
mutate(AG = A2GEditingIndex_Alu) %>%
filter(Covid_Load != "Other Viruses") %>%
compare_means(AG ~ Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns" #c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")
df_c <- as.data.frame(c)
AluEditingIndex <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
ggplot(
aes(y=A2GEditingIndex_Alu, x=Covid_Load))+
geom_boxplot(aes(fill=Covid_Load)) + geom_jitter( size=2, alpha = 0.2)+
themes_ggplot+ ylab("ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
AluEditingIndex
out_path <- paste0(OUT_DIR,'/',"AluEditing_AG.pdf")
ggsave(out_path, AluEditingIndex, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_Alu)
c <- df_combined %>%
# filter(Gene_change == "ADAR1") %>%
mutate(AG = A2GEditingIndex_Alu) %>%
compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2, y_position *1.3, y_position*1.4,y_position *1.5,
y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
df_c <- as.data.frame(c)
adar1_viralLoad <- ggplot(data=df_combined,
aes(y=A2GEditingIndex_Alu, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
# facet_grid( ~ "ADAR1" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_Alu_Index.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$A2GEditingIndex_Alu)
c <- df_combined %>%
filter(Covid_Load == "COVID-19") %>%
mutate(AG = A2GEditingIndex_Alu) %>%
compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
df_c <- as.data.frame(c)
adar1_viralLoad <-
df_combined %>%
filter(Covid_Load == "COVID-19") %>%
ggplot(aes(y=A2GEditingIndex_Alu, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
# facet_grid( ~ "ADAR1" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("yellow", "orange","red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_NoControl_index.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
edit_type_names <- c("A2GEditingIndex_3UTR" = "A-G (T-C)", "A2TEditingIndex_3UTR" = "A-T (T-A)",
"C2AEditingIndex_3UTR" = "C-A (G-T)", "C2TEditingIndex_3UTR" = "C-T (G-A)",
"C2GEditingIndex_3UTR" = "C-G (G-C)", "A2CEditingIndex_3UTR" = "A-C (T-G)")
AG_3UTR_clean <-df_combined %>%
dplyr::select("Run", "A2GEditingIndex_3UTR","A2TEditingIndex_3UTR",
"C2AEditingIndex_3UTR","C2TEditingIndex_3UTR","C2GEditingIndex_3UTR","A2CEditingIndex_3UTR") %>%
gather(edit_type, value, -Run) %>%
ggplot(
aes(y=value, x=edit_type ,fill=edit_type))+
geom_boxplot(aes(fill=edit_type)) +
themes_ggplot+
ylab("3'UTR ALU Editing Index")+
xlab ("Nucleotide mismatch")+
scale_fill_discrete(name = "Mistmatch:")+
theme(axis.title.x = element_blank())+
theme(legend.position = "none") +
scale_fill_manual(values=c("yellow", "red", "blue", "green", "orange", "black"))+
scale_x_discrete(labels= edit_type_names)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
AG_3UTR_clean
out_path <- paste0(OUT_DIR,'/',"3UTREditing_cleanSignal.pdf")
ggsave(out_path, AG_3UTR_clean, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_3UTR)
df_combined %>%
mutate(AG = A2GEditingIndex_3UTR) %>%
compare_means(AG ~ Covid_Load, data = .)
## # A tibble: 3 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 AG Control Other Viruses 2.43e- 4 2.4e- 4 0.00024 *** Wilcoxon
## 2 AG Control COVID-19 7.03e-33 2.1e-32 < 2e-16 **** Wilcoxon
## 3 AG Other Viruses COVID-19 8.39e- 8 1.7e- 7 8.4e-08 **** Wilcoxon
c <- df_combined %>%
mutate(AG = A2GEditingIndex_3UTR) %>%
compare_means(AG ~ Covid_Load, data = .)
c %<>% mutate(y_pos = c(y_position *1,y_position *1.1,y_position *1.2),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns" #c$p.adj.format.stars
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars <- paste0(c$p.adj.format.stars," (p=",c$p.adj,")")
c$p.adj.format.stars[grep("ns",c$p.adj.format.stars) ] <- "ns"
df_c <- as.data.frame(c)
UTR3EditingIndex <- df_combined%>%
# filter(Covid_Load != "Other Viruses") %>%
ggplot(
aes(y=A2GEditingIndex_3UTR, x=Covid_Load))+
geom_boxplot(aes(fill=Covid_Load)) + geom_jitter( size=2, alpha = 0.2)+
# facet_grid( ~ "3'UTR Editing Index" , drop = TRUE, scales = "free", space = "free",)+
themes_ggplot+ ylab("3'UTR ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow","red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
UTR3EditingIndex
out_path <- paste0(OUT_DIR,'/',"3UTRAluEditing_AG.pdf")
ggsave(out_path, UTR3EditingIndex, width = 10, height = 6)
y_position = max(df_combined$A2GEditingIndex_3UTR)
c <- df_combined %>%
mutate(AG = A2GEditingIndex_3UTR) %>%
compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2,
y_position *1.3, y_position*1.4,y_position *1.5,
y_position *1.6, y_position*1.7,y_position *1.8, y_position *1.9),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
df_c <- as.data.frame(c)
adar1_viralLoad <- ggplot(data=df_combined,
aes(y=A2GEditingIndex_3UTR, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
themes_ggplot+ ylab("3'UTR ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("blue","yellow", "orange","red","purple"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_Alu_3utrIndex.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
y_position = max(df_combined$A2GEditingIndex_3UTR)
c <- df_combined %>%
filter(Covid_Load == "COVID-19") %>%
mutate(AG = A2GEditingIndex_3UTR) %>%
compare_means(AG ~VIRAL_LOAD, data = .)
c %<>% mutate(y_pos = c(y_position *1, y_position*1.1,y_position *1.2),
labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# c <- c[c(1,3,2),] #reorder manually sorry
c$p.adj.format <- c$p.adj
c$p.adj.format[c$p.adj < 2e-16] <- "< 2e-16"
c$p.adj.format[as.numeric(c$p.adj) > 0.1] <- "N.S"
c$p.adj.format.stars <- c$p.adj
c$p.adj.format.stars[c$p.adj >= 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.1] <- "ns"
c$p.adj.format.stars[c$p.adj < 0.05] <- "*"
c$p.adj.format.stars[c$p.adj < 0.01] <- "**"
c$p.adj.format.stars[c$p.adj < 0.001] <- "***"
c$p.adj.format.stars[c$p.adj < 2e-16] <- "****"
c$p.adj.format.stars[c$p.adj < 0.05] <- paste0("* (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.01] <- paste0("** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 0.001] <- paste0("*** (p=", c$p.adj,")")
c$p.adj.format.stars[c$p.adj < 2e-16] <- paste0("**** (p=", c$p.adj,")")
df_c <- as.data.frame(c)
adar1_viralLoad <-
df_combined %>%
filter(Covid_Load == "COVID-19") %>%
ggplot(aes(y=A2GEditingIndex_3UTR, x=VIRAL_LOAD))+
geom_boxplot(aes(fill=VIRAL_LOAD)) + geom_jitter( size=2, alpha = 0.2)+
themes_ggplot+ ylab("3'UTR ALU Editing Index")+ xlab ("Group")+ scale_fill_discrete(name = "Group:")+
expand_limits(y = 0) + scale_fill_manual(values=c("yellow", "orange","red"))+
theme(axis.title.x = element_blank())+ theme(legend.position = "none")+
geom_signif(data = df_c,
aes(xmin=group1, xmax=group2, annotations=p.adj.format.stars, y_position=y_pos),manual = TRUE)
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
adar1_viralLoad
out_path <- paste0(OUT_DIR,'/',"ViralLoad_NoControl_3utrindex.pdf")
ggsave(out_path, adar1_viralLoad, width = 10, height = 7)
corr_3UTR_ADAR <- df_combined %>%
ggplot(aes(y=A2GEditingIndex_3UTR, x=ADAR))+
geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+
stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
ylab("3'UTR ALU Editing Index")+ xlab ("ADAR1 Normalized Expression (TPM)")
corr_3UTR_ADAR
## `geom_smooth()` using formula 'y ~ x'
out_path <- paste0(OUT_DIR,'/',"corr_3UTR_ADAR.pdf")
ggsave(out_path, corr_3UTR_ADAR, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
corr_ISG_ADAR <- df_combined %>%
ggplot(aes(y=ISG38_score, x=ADAR))+
geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='loess', se=F, colour ="black")+
stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
ylab("Interferon-Stimulated Genes Score")+ xlab ("ADAR1 Normalized Expression (TPM)")
corr_ISG_ADAR
## `geom_smooth()` using formula 'y ~ x'
out_path <- paste0(OUT_DIR,'/',"corr_ISG_ADAR.pdf")
ggsave(out_path, corr_ISG_ADAR, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
corr_ISG_UTR3 <- df_combined %>%
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index")
corr_ISG_UTR3
## `geom_smooth()` using formula 'y ~ x'
out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3.pdf")
ggsave(out_path, corr_ISG_ADAR, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
df_combined$VIRAL_LOAD_changedName <- as.character(df_combined$VIRAL_LOAD)
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "None"] <- "Contol"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "Low"] <- "Low\nviral load"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "Medium"] <- "Medium\nviral load"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "High"] <- "High\nviral load"
df_combined$VIRAL_LOAD_changedName[df_combined$VIRAL_LOAD_changedName == "OtherViralInfection"] <- "Other Viruses"
df_combined$VIRAL_LOAD_changedName <-factor(df_combined$VIRAL_LOAD_changedName, levels = c(
"Contol", "Other Viruses", "Low\nviral load", "Medium\nviral load", "High\nviral load"
))
corr_ISG_UTR3_viral <- df_combined %>%
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
geom_point(aes(color=VIRAL_LOAD_changedName),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow", "orange","red","purple"), name = "Group:") +
xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index")
corr_ISG_UTR3_viral
## `geom_smooth()` using formula 'y ~ x'
out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3_viral.pdf")
ggsave(out_path, corr_ISG_UTR3_viral, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'
df_combined$VIRAL_LOAD <- factor(df_combined$VIRAL_LOAD, levels = c("None","OtherViralInfection","Low" ,"Medium", "High" ))
corr_ISG_UTR3_viral_grid <- df_combined %>%
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
geom_point(aes(color=Covid_Load),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","red"), name = "Group:") +
facet_grid(~VIRAL_LOAD_changedName)+
xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index")
corr_ISG_UTR3_viral_grid
## `geom_smooth()` using formula 'y ~ x'
out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3_viral_grid.pdf")
ggsave(out_path, corr_ISG_UTR3_viral_grid, width = 14, height = 6)
## `geom_smooth()` using formula 'y ~ x'
corr_ISG_UTR3_viral_grid <- df_combined %>%
filter(Covid_Load != "Other Viruses") %>%
ggplot(aes(x=A2GEditingIndex_3UTR, y=ISG38_score))+
geom_point(aes(color=VIRAL_LOAD_changedName),size=3, alpha = 0.4) + themes_ggplot+ geom_smooth(method='lm', se=F, colour ="black")+# facet_grid(~Covid_Load)+
stat_cor(method = "pearson", label.x.npc = "left", label.y.npc = "top") + scale_color_manual(values=c("blue","yellow","orange","red"), name = "Group:") +
facet_grid(~VIRAL_LOAD_changedName)+
xlab("Interferon-Stimulated Genes Score")+ ylab ("3'UTR ALU Editing Index") + theme(legend.position="none")
corr_ISG_UTR3_viral_grid
## `geom_smooth()` using formula 'y ~ x'
out_path <- paste0(OUT_DIR,'/',"corr_corr_ISG_UTR3_viral_grid_onlyCOVID2.pdf")
ggsave(out_path, corr_ISG_UTR3_viral_grid, width = 10, height = 6)
## `geom_smooth()` using formula 'y ~ x'